################################################################################################
################################################################################################
#######  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
#######  Replication for analysis in Chapter 7.
#######  R version 4.3.1 (2023-06-16) -- "Beagle Scouts"
#######  Platform: aarch64-apple-darwin20 (64-bit)
########################################################################
########################################################################

rm(list=ls())

#Install missing packages
ipak <- function(pkg) {
	new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
	if (length(new.pkg)) 
		install.packages(new.pkg, dependencies = TRUE)
	sapply(pkg, require, character.only = TRUE)
}

packages<-c("arm","plyr","stargazer", "MASS", "CBPS", "ggplot2", "sandwich", "rgdal", "proj4", "maptools", "spdep", "visreg", "lmtest", "xtable", "CBPS","conleyreg", "texreg", "openxlsx")
ipak(pkg = packages)

######################################################
########### Constructing the dataset #################
######################################################

load("chapter7/Gminy_data.RData")

gis<-read.csv("chapter7/MergingGminy.csv") #Data on territorial overlap, computed in ArcGIS
gis2<-merge(gis, map[,c(6,12:22,31)], by.x="Code48", by.y="Code", all.x="TRUE")
head(gis2)

#Calculate share of the total area of 1948 gmina that belongs into a particular contemporary (2013) gmina
#Some numerical variables become characters, so convert back to numeric when necessary.

gis2$areaShare<-gis2$Area48Km2/gis2$AreaKm2
##Recalculate 1948 population data from historical to contemporary gminas based on the overlap
gis2$UrbPop2<-(gis2$Miasto*gis2$Poles)*gis2$areaShare
gis2$Poles2<-gis2$Poles*gis2$areaShare
gis2$fromUSSR2<-gis2$fromUSSR* gis2$areaShare
gis2$fromOtherC2<-as.numeric(gis2$fromOtherC)* gis2$areaShare
gis2$fromCentrP2<-as.numeric(gis2$fromCentrP)* gis2$areaShare
gis2$authocht2<-gis2$authocht* gis2$areaShare
gis2$Men2<-gis2$Men* gis2$areaShare
gis2$TotPop2<-as.numeric(gis2$Total_popu)*gis2$areaShare
gis2$Aged18_59_2<-(as.numeric(gis2$Aged18_59M) +as.numeric(gis2$Aged18_59W))*gis2$areaShare
gis2$Aged60Up2<-(as.numeric(gis2$Aged60Olde) +as.numeric(gis2$Aged60Ol_1))*gis2$areaShare
head(gis2)
summary(gis2$areaShare)


#Add up 1948 population data to the level of 2013 gminas using unique field "Kod_Teryt"
#This dataset includes all intersections, including contemporary municipalities that are split in two by the Oder-Niesse Line.
gis3<-ddply(gis2, .(Kod_Teryt), summarise, TotPop48=sum(TotPop2), Men48=sum(Men2), fromUSSR48=sum(fromUSSR2), fromCP48=sum(fromCentrP2), aut48=sum(authocht2), fromOther48=sum(fromOtherC2), Aged60Up48=sum(Aged60Up2), Aged18_5948=sum(Aged18_59_2), UrbPop48=sum(UrbPop2), Poles48=sum(Poles2))

#This data includes all municipalities fully located in the formerly German Territories.
germdat<-read.csv("chapter7/covariates1939.csv") #N=630

histdata<-merge(germdat, gis3, by.y="Kod_Teryt", by.x="ID", all.x=TRUE)

t<-read.csv("chapter7/data19802000.csv")

## Data at the level of Polish gminas circa 2010 (N=630). 
dat0<-merge(t, histdata, by="ID", all.y=TRUE) 


type<-read.xlsx("chapter7/PODM_1597_XTAB_20231101051336-private2.xlsx", sheet="TABLE")
head(type)
vars<-names(type)[c(1,2,4:111)]
type[vars] <- sapply(type[vars], as.numeric)
head(type)

type$SecDiv1995<-1-((type$SectionA1995/type$Tot1995)^2+(type$SectionB1995/type$Tot1995)^2+(type$SectionC1995/type$Tot1995)^2+(type$SectionD1995/type$Tot1995)^2+(type$SectionE1995/type$Tot1995)^2+(type$SectionF1995/type$Tot1995)^2+(type$SectionG1995/type$Tot1995)^2+(type$SectionH1995/type$Tot1995)^2+(type$SectionI1995/type$Tot1995)^2+(type$SectionJ1995/type$Tot1995)^2+(type$SectionK1995/type$Tot1995)^2+(type$SectionL1995/type$Tot1995)^2+(type$SectionM1995/type$Tot1995)^2+(type$SectionN1995/type$Tot1995)^2+(type$SectionO1995/type$Tot1995)^2+(type$SectionP1995/type$Tot1995)^2+(type$SectionQ1995/type$Tot1995)^2)
type$SecDiv1998 <- 1 - ((type$SectionA1998/type$Tot1998)^2 +(type$SectionB1998/type$Tot1998)^2 +(type$SectionC1998/type$Tot1998)^2 +(type$SectionD1998/type$Tot1998)^2 +(type$SectionE1998/type$Tot1998)^2 + (type$SectionF1998/type$Tot1998)^2 + (type$SectionG1998/type$Tot1998)^2 + (type$SectionH1998/type$Tot1998)^2 + (type$SectionI1998/type$Tot1998)^2 +(type$SectionJ1998/type$Tot1998)^2 + (type$SectionK1998/type$Tot1998)^2 +(type$SectionL1998/type$Tot1998)^2 + (type$SectionM1998/type$Tot1998)^2 +  (type$SectionN1998/type$Tot1998)^2 +(type$SectionO1998/type$Tot1998)^2 + (type$SectionP1998/type$Tot1998)^2 + (type$SectionQ1998/type$Tot1998)^2)
type$SecDiv2000 <- 1 - ((type$SectionA2000/type$Tot2000)^2 + (type$SectionB2000/type$Tot2000)^2 + (type$SectionC2000/type$Tot2000)^2 +(type$SectionD2000/type$Tot2000)^2 + (type$SectionE2000/type$Tot2000)^2 + (type$SectionF2000/type$Tot2000)^2 + (type$SectionG2000/type$Tot2000)^2 +(type$SectionH2000/type$Tot2000)^2 + (type$SectionI2000/type$Tot2000)^2 + (type$SectionJ2000/type$Tot2000)^2 +(type$SectionK2000/type$Tot2000)^2 + (type$SectionL2000/type$Tot2000)^2 + (type$SectionM2000/type$Tot2000)^2 +(type$SectionN2000/type$Tot2000)^2 +(type$SectionO2000/type$Tot2000)^2 +(type$SectionP2000/type$Tot2000)^2 + (type$SectionQ2000/type$Tot2000)^2)

which(dat0$Kod_Teryt %in% type$Code_edited_formerge)
dat0[which(!dat0$Kod_Teryt %in% type$Code_edited_formerge), 1:5] 

dat<-merge(dat0, type[,c("Code_edited_formerge", "SecDiv1995", "SecDiv1998", "SecDiv2000")], by.x="Kod_Teryt", by.y="Code_edited_formerge")

##############################################
########### COMPUTING MAIN VARIABLES #########
##############################################

dat$PctUSSR<-dat$fromUSSR48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctCP<-dat$fromCP48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctOther<-dat$fromOther48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)
dat$PctAut<-dat$aut48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)

dat$PctUSSRWithin<-dat$PctUSSR/(1-dat$PctAut)
dat$PctCentrPWithin<-dat$PctCP/(1-dat$PctAut)
dat$PctOtherCWithin<-dat$PctOther/(1-dat$PctAut)

## Key independent variables: diversity of immigration computed using formula (8) in Alesina, Harnoss, Rapoport. 2016. J Econ Growth 21: 101-138. 
dat$divMig<-dat$PctUSSRWithin*(1-dat$PctUSSRWithin)+ dat$PctCentrPWithin*(1-dat$PctCentrPWithin)+ dat$PctOtherCWithin*(1-dat$PctOtherCWithin)
dat$shareMigrants<-1-dat$PctAut

dat$PctUrb1948<-dat$UrbPop48/dat$Poles48
dat$PctMen1948<-dat$Men48/dat$TotPop48
dat$PctOver6048<-dat$Aged60Up48/dat$TotPop48
dat$ShareAged1859<-dat$Aged18_5948/dat$TotPop48

#Variables from 1939: 
dat$SharePrzemysl1939<-dat$LudnoscPrzemysl/dat$LudnoscOgolem
dat$ShareOver100<-dat$GospOver100/dat$GospodRolne 

##Prussian provinces and Danzig: 
dat$GermanProvince<-as.character(dat$GermanRegion)
dat$GermanProvince[dat$GermanRegion %in% c("Breslau","Liegnitz", "Oppeln")]<-"Silesia"
dat$GermanProvince[dat$GermanRegion %in% c("Allenstein","Gumbinnen", "Koenigsberg")]<-"East Prussia"
dat$GermanProvince[dat$GermanRegion %in% c("Potsdam", "Frankfurt")]<-"Brandenburg"
dat$GermanProvince[dat$GermanRegion %in% c("Koslin","Stettin","Marienwerder")]<-"Pomerania" 

####################################################################
######################## GMINA BUDGETS IN EARLY 1990 ###############
####################################################################

dat$avgProptax9395<-(dat$PropTax95+ dat$PropTax94+dat$PropTax93)/3
dat$PropTax9395PC<-((dat$NieruchW95+ dat$nier94+ dat$PropW93)/dat$ludn9306)/3
dat$Exp9395pc<-(dat$TotExp93/dat$ludn9306 +dat$Wydatki95/dat$ludn9306 +dat$wyd94/dat$ludn9306)/3
dat$Exp9394pc<-(dat$TotExp93/dat$ludn9306 +dat$wyd94/dat$ludn9306)/2
dat$CapSpPC<-(dat$WydInw95+dat$wyd_inw94+ dat$CapitalSp93)/dat$ludn9306
dat$CapSpPC9394<-(dat$wyd_inw94+ dat$CapitalSp93)/dat$ludn9306

###################################################################
######################## ECONOMIC OUTCOMES IN 1980s ###############
###################################################################

dat$ShareRzemPr82<-dat$WRzemPryw1982/dat$Ludnosc1980
dat$WRzemPrPerThs82<-dat$WRzemPryw1982/(dat$Ludnosc1980/1000)
dat$TVsPerThs1980<-dat$TVs1980/(dat$Ludnosc1980/1000)
dat$TelsPerThs1980<-dat$Phones1980/(dat$Ludnosc1980/1000)
dat$ShareUspPerThs1982<-dat$WGospUspol1982/(dat$Ludnosc1980/1000)
dat$PunktyPerThous1980<-dat$Punkty/(dat$Ludnosc1980/1000)

## Additional variables for g-estimation and appendix 
dat$PtArrived7188<-(dat$Arrived1971_78No+dat$Arrived1981_88No)/dat$Pop1988 
dat$ShareRoln<-dat$GospUspRolnictwo/dat$Ludnosc1980 
dat$ShareWyzsze78<-(dat$WyszeW78+ dat$niepWysze78+ dat$policealne78)/dat$Pop15Above78
dat$ShareWyzsze88<-dat$TertEdu1988/dat$Pop10to191988  
dat$ShareWyzsze02<-(dat$Wyszce02+dat$Policeal02)/dat$Pop2002        
dat$EduDiv88<-1-(dat$ShareWyzsze88^2+(dat$VocEdu1988/dat$Pop10to191988)^2+(dat$SecEdu1988/dat$Pop10to191988)^2+(dat$PrimEdu1988/dat$Pop10to191988)^2)


dat$PtNotFromBirth<-1-dat$FromBirthNo1988/dat$Pop1988
dat$PtArrived7178<-dat$Arrived1971_78No/dat$Pop1988
dat$PtArrived8188<-dat$Arrived1981_88No/dat$Pop1988


dat$LibsPerThous1980<-dat$Biblioteki1982/(dat$Ludnosc1980/1000)
dat$SchoolsPerThous1980<-dat$Szkoly1982/(dat$Ludnosc1980/1000)
dat$WRolnUspRol1982<-dat$GospUspRolnictwo/(dat$Ludnosc1980/1000)
dat$WPrzemUsp1982<-dat$GospUspPrzemysl/(dat$Ludnosc1980/1000)
dat$SrWyrownPts80<-dat$SrodkiWyrownawcze1980/(dat$Ludnosc1980/1000)
dat$DochodyBudzetowePts1980 <-dat$DochodyBudzetowe1980/(dat$Ludnosc1980/1000)
dat$DochodyWlasnePts1980 <-dat$DochodyBudzetoweWlasne1980/(dat$Ludnosc1980/1000)

###################################################################
######################## ECONOMIC OUTCOMES IN 1990s ###############
###################################################################

dat$persIncTax1993pc<-dat$SharePIT93/dat$ludn9306
dat$persIncTax1995pc<-dat$IncPers95/dat$Pop1995
dat$persIncTax1998pc<-dat$IncPers98/dat$Pop1998
dat$persIncTax2000pc<-dat$IncPers00/dat$Pop2000

dat$PodmPryw1995perthous<-dat$PrywAll95/(dat$Pop1995/1000)
dat$PodmPryw1998perthous<-dat$PrywAll98/(dat$Pop1998/1000)
dat$PodmPryw2000perthous<-dat$PrywAll00/(dat$Pop2000/1000)


########################################################################
####  TABLE A.13 in the Appendix: DESCRIPTIVE STATITISTICS  ############
########################################################################


stargazer(dat[,c("divMig", "shareMigrants", "PctUSSR", "PctCP","PctAut", "PctOther","PctMen1948", "ShareAged1859", 
                 "TotPop48","PctUrb1948", "SharePrzemysl1939", "ShareOver100",  "distrail48", "DistPow1950", 
                 "DistLandBorder", "OSPna10tys1995", "Straz2007","avgProptax9395", "PropTax9395PC","ShareUspPerThs1982",
                 "WRzemPrPerThs82", "PunktyPerThous1980", "TVsPerThs1980","TelsPerThs1980",  "Szkoly1982", "Biblioteki1982", 
                 "ShareWyzsze78", "ShareWyzsze88", "ShareWyzsze02", "persIncTax1993pc", "persIncTax1995pc", "persIncTax1998pc", 
                 "persIncTax2000pc","PodmPryw1995perthous", "PodmPryw1998perthous", "PodmPryw2000perthous")], summary.stat=c("n", "mean","sd",  "min", "max"), digits=2,
          covariate.labels=c("Migrant Diversity (1948)", "Share Migrants (1948)", "Share from USSR (1948)", "Share from Central Poland (1948)", "Share Autochthonous (1948)" ,  "Share from Europe (1948)","Share Men (1948)", "Share Aged 18-59 (1948)", "Total Population (1948)", "Share Urban (1948)", "Share in Industry (1939)", "Share Farms Over 100 (1939)", "Distance to Railway (1948)", "Distance to County Seat (1950)", "Distance to Border (km)", "Volunteer Fire Brigades per 1,000 (1995-96) ", "Municipal Guard (2007)","Property Tax Rate (1993-95)", "Property Tax Revenue per capita, Zł. (1993-95) ", "Employed in Socialized Economy (1982)","In Private Handicrafts per 1,000 (1982)", "Shops per 1,000 (1980)", "TVs per 1,000 (1980)","Phones per 1,000 (1980)",  "Schools per 1000 (1982)", "Libraries per 1000 (1982)", "Share w/ Higher Edu (1978)", "Share w/ Higher Edu (1988)", "Share w/ Higher Edu (2002)", "Personal Income Tax per capita, Zł. (1993)", "Personal Income Tax per capita, Zł. (1995)",  "Personal Income Tax per capita, Zł. (1998)",  "Personal Income Tax per capita, Zł. (2000)",  "Private Enterprises per 1,000 (1995)", "Private Enterprises per 1000 (1998)",  "Private Enterprises per 1000 (2000)"))


####################################################################################
###########  TABLE 7.2: Migrant Diversity and Socio-Economic Covariates #############
####################################################################################

corrs<-rbind(cbind(cor.test(dat$divMig, dat$divMig)$estimate, summary(lm(divMig~ divMig, data=dat))$r.squared,
      cor.test(dat$divMig, dat$shareMigrants)$estimate, summary(lm(divMig~ shareMigrants, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$PctUrb1948)$estimate,summary(lm(divMig~ PctUrb1948, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$PctUrb1948)$estimate,summary(lm(shareMigrants~ PctUrb1948, data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$SharePrzemysl1939)$estimate, summary(lm(divMig~ SharePrzemysl1939, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$SharePrzemysl1939)$estimate, summary(lm(shareMigrants~ SharePrzemysl1939, data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$ShareOver100)$estimate, summary(lm(divMig~ ShareOver100, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$ShareOver100)$estimate, summary(lm(shareMigrants~ ShareOver100, data=dat))$r.squared),
cbind(cor.test(dat$divMig, log(dat$TotPop48))$estimate, summary(lm(divMig~ log(dat$TotPop48), data=dat))$r.squared,
      cor.test(dat$shareMigrants, log(dat$TotPop48))$estimate, summary(lm(shareMigrants~ log(dat$TotPop48), data=dat))$r.squared), 
cbind(cor.test(dat$divMig, dat$DistPow1950)$estimate, summary(lm(divMig~ DistPow1950, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$DistPow1950)$estimate, summary(lm(shareMigrants~ DistPow1950, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$distrail48)$estimate, summary(lm(divMig~ distrail48, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$distrail48)$estimate, summary(lm(shareMigrants~ distrail48, data=dat))$r.squared),
cbind(cor.test(dat$divMig, dat$DistLandBorder)$estimate, summary(lm(divMig~ DistLandBorder, data=dat))$r.squared,
      cor.test(dat$shareMigrants, dat$DistLandBorder)$estimate, summary(lm(shareMigrants~ DistLandBorder, data=dat))$r.squared))

corrs2<-as.data.frame(corrs, col.names=c("Variable","Pearson Correlation", "R^2", "Pearson Correlation", "R^2"), 
                      row.names=c("Migrant diversity (1948)", "Share urban (1948)","Share in industry (1939)", "Share farms over 100 ha (1939)", "Ln population (1948)", "Distance to county seat (1950)", "Distance to raiway (1948)", "Distance to German border"))
names(corrs2)<-c("Pearson Correlation", "R^2", "Pearson Correlation", "R^2")

xtable(corrs2, digits=3)



###############################################################################
################## Table 7.3: Economic activity in the 1980s  #################
###############################################################################
dat$LnTotPop48<-log(dat$TotPop48)
dat$LnPunktyPerThous1980<-log(dat$PunktyPerThous1980)
dat$LnTelsPerThs1980<-log(dat$TelsPerThs1980)
dat$LnTVsPerThs1980<-log(dat$TVsPerThs1980)

uspC<-lm(ShareUspPerThs1982~ divMig + shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat)
summary(uspC)

rzemC<-lm(WRzemPrPerThs82~ divMig + shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat)
summary(rzemC)

punktyC<-lm(LnPunktyPerThous1980 ~ divMig + shareMigrants+ PctUrb1948 +SharePrzemysl1939 + ShareOver100 + LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat)

telC<-lm(LnTelsPerThs1980 ~ divMig + shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950 +DistLandBorder + distrail48+ GermanProvince, data=dat) 
summary(telC) 

tvC<-lm(LnTVsPerThs1980~ divMig + shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat)
summary(tvC)


ses1 <- list(coeftest(uspC, vcov = vcovHC(uspC, type="HC1"))[,2], coeftest(rzemC, vcov = vcovHC(rzemC, type="HC1"))[,2], coeftest(punktyC, vcov = vcovHC(punktyC, type="HC1"))[,2], coeftest(telC, vcov = vcovHC(telC, type="HC1"))[,2], 
             coeftest(tvC, vcov = vcovHC(tvC, type="HC1"))[,2])

pvals1 <- list(coeftest(uspC, vcov = vcovHC(uspC, type="HC1"))[,4], coeftest(rzemC, vcov = vcovHC(rzemC, type="HC1"))[,4], 
               coeftest(punktyC, vcov = vcovHC(punktyC, type="HC1"))[,4], coeftest(telC, vcov = vcovHC(telC, type="HC1"))[,4], 
               coeftest(tvC, vcov = vcovHC(tvC, type="HC1"))[,4])


stargazer(uspC, rzemC, punktyC, telC, tvC,  se = ses1, p=pvals1, digits=2, style="APSR",  
          omit=c("GermanProvince", "Constant", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "DistPow1950", "DistLandBorder", "distrail48", "TotPop48"), omit.stat=c("rsq","ser","f"), 
          covariate.labels = c("Migrant Diversity", "Migrant Share"), 
          add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), 
          c("Province Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), 
          star.cutoffs=c(0.1, 0.05, 0.01), notes = c("* p<0.1; ** p<0.05; *** p<0.01"), notes.append = F)  


##############################################################
################## Figure 7.2: Standardized coefs   ##########
##############################################################
cse = function(reg) {
  rob = sqrt(diag(vcovHC(reg, type = "HC1")))
  return(rob)
}
hc1<-function(x) vcovHC(x, type="HC1")  

### FIGURE: 
ols1s<-standardize(uspC, unchanged="GermanProvince", standardize.y=TRUE)
ols2s<-standardize(rzemC, unchanged="GermanProvince", standardize.y=TRUE)
ols3s<-standardize(punktyC, unchanged="GermanProvince", standardize.y=TRUE)
ols4s<-standardize(telC, unchanged="GermanProvince", standardize.y=TRUE)
ols5s<-standardize(tvC, unchanged="GermanProvince", standardize.y=TRUE)

#### Let's plot the coefficients on "ShareExpellees1950" and "DivOrMig1950"

plot.outDiversity <- sapply(list(ols1s, ols2s,  ols3s, ols4s, ols5s), function(x){
  coeftest(x)[2,1:2] 
})


plot.outMigr <- sapply(list(ols1s, ols2s,  ols3s, ols4s, ols5s), function(x){ 
  coeftest(x)[3,1:2] 
})

plot.outMigr <- as.data.frame(t(plot.outMigr))
plot.outDiversity <- as.data.frame(t(plot.outDiversity))

plot.outAllS <- rbind(plot.outMigr, plot.outDiversity)

plot.outAllS$dv <- factor(rep(c("Socialized economy", "Private handicrafts", "Ln shops", "Ln phones", "Ln TVs"), 2), 
                          levels=c("Socialized economy", "Private handicrafts", "Ln shops", "Ln phones", "Ln TVs")) 
plot.outAllS$iv <- c(rep("Migrant share",5), rep("Migrant diversity", 5))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`

fig.StSocialism = ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average partial effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Socialized economy", "Private handicrafts", "Ln shops", "Ln phones", "Ln TVs")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure7.2.jpg",fig.StSocialism, width = 8, height = 6)


#########################################################################################
#### Table 7.4. Economic Outcomes with heteroskedasticity-robust standard errors  #######
#########################################################################################

dat$LnpersIncTax1993pc<-log(dat$persIncTax1993pc)
dat$LnpersIncTax1995pc<-log(dat$persIncTax1995pc)
dat$LnpersIncTax1998pc<-log(dat$persIncTax1998pc)
dat$LnpersIncTax2000pc<-log(dat$persIncTax2000pc)
dat$LnPodmPryw1995perthous<-log(dat$PodmPryw1995perthous)
dat$LnPodmPryw1998perthous<-log(dat$PodmPryw1998perthous)
dat$LnPodmPryw2000perthous<-log(dat$PodmPryw2000perthous)

inc1993pcNM4<-lm(LnpersIncTax1993pc ~  divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
inc1995pcNM4<-lm(LnpersIncTax1995pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
inc1998pcNM4<-lm(LnpersIncTax1998pc ~  divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
inc2000pcNM4<-lm(LnpersIncTax2000pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
ent1995pcNM4<-lm(LnPodmPryw1995perthous ~  divMig+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
ent1998pcNM4<-lm(LnPodmPryw1998perthous ~  divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
ent2000pcNM4<-lm(LnPodmPryw2000perthous ~  divMig+shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)

ses1 <- list(coeftest(inc1993pcNM4, vcov = vcovHC(inc1993pcNM4, type="HC1"))[,2], coeftest(inc1995pcNM4, vcov = vcovHC(inc1995pcNM4, type="HC1"))[,2], coeftest(inc1998pcNM4, vcov = vcovHC(inc1998pcNM4, type="HC1"))[,2], 
             coeftest(inc2000pcNM4, vcov = vcovHC(inc2000pcNM4, type="HC1"))[,2], coeftest(ent1995pcNM4, vcov = vcovHC(ent1995pcNM4, type="HC1"))[,2], 
             coeftest(ent1998pcNM4, vcov = vcovHC(ent1998pcNM4, type="HC1"))[,2], coeftest(ent2000pcNM4, vcov = vcovHC(ent2000pcNM4, type="HC1"))[,2])

pvals1 <- list(coeftest(inc1993pcNM4, vcov = vcovHC(inc1993pcNM4, type="HC1"))[,4], coeftest(inc1995pcNM4, vcov = vcovHC(inc1995pcNM4, type="HC1"))[,4], coeftest(inc1998pcNM4, vcov = vcovHC(inc1998pcNM4, type="HC1"))[,4], 
               coeftest(inc2000pcNM4, vcov = vcovHC(inc2000pcNM4, type="HC1"))[,4], coeftest(ent1995pcNM4, vcov = vcovHC(ent1995pcNM4, type="HC1"))[,4], 
               coeftest(ent1998pcNM4, vcov = vcovHC(ent1998pcNM4, type="HC1"))[,4], coeftest(ent2000pcNM4, vcov = vcovHC(ent2000pcNM4, type="HC1"))[,4])


stargazer(inc1993pcNM4,inc1995pcNM4, inc1998pcNM4, inc2000pcNM4, ent1995pcNM4, ent1998pcNM4, ent2000pcNM4,  se = ses1, p=pvals1, digits=2, style="APSR",  
          omit=c("GermanProvince", "Constant", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "DistPow1950", "DistLandBorder", "distrail48", "TotPop48"), omit.stat=c("rsq","ser","f"), 
          covariate.labels = c("Migrant Diversity", "Migrant Share"), 
          add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), 
                         c("Province Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), 
          star.cutoffs=c(0.1, 0.05, 0.01), notes = c("* p<0.1; ** p<0.05; *** p<0.01"), notes.append = F)  


##########################################################
###### Figure 7.3: Economic activity in the 1990s ########
##########################################################

m0s<-standardize(inc1993pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m1s<-standardize(inc1995pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m2s<-standardize(inc1998pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m3s<-standardize(inc2000pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m4s<-standardize(ent1995pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m5s<-standardize(ent1998pcNM4, unchanged="GermanProvince", standardize.y=TRUE)
m6s<-standardize(ent2000pcNM4, unchanged="GermanProvince", standardize.y=TRUE)


plot.outDiversity <- sapply(list(m0s, m1s, m2s,  m3s, m4s, m5s, m6s), function(x){
  coeftest(x)[2,1:2] 
})

plot.outMigr<- sapply(list(m0s,m1s, m2s,  m3s, m4s, m5s, m6s), function(x){ 
  coeftest(x)[3,1:2]  
})

plot.outDiversity <- as.data.frame(t(plot.outDiversity))
plot.outMigr <- as.data.frame(t(plot.outMigr))

plot.outAllS <- rbind(plot.outMigr, plot.outDiversity)

plot.outAllS$dv <- factor(rep(c("Income tax: 1993", "Income tax: 1995", "Income tax: 1998", "Income tax: 2000", "Enterprises: 1995", "Enterprises: 1998", "Enterprises: 2000"), 2), 
                          levels=c("Income tax: 1993", "Income tax: 1995", "Income tax: 1998", "Income tax: 2000", "Enterprises: 1995", "Enterprises: 1998", "Enterprises: 2000")) 
plot.outAllS$iv <- c(rep("Migrant share",7), rep("Migrant diversity", 7))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`

fig.econ1990 = ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape = iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average partial effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Income tax: 1993", "Income tax: 1995", "Income tax: 1998", "Income tax: 2000", "Enterprises: 1995", "Enterprises: 1998", "Enterprises: 2000")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure7.3.jpg",fig.econ1990, width = 8, height = 6)



####################################################
###### Conley errors for Tables 7.3 and 7.4 ########
####################################################

load("chapter7/contempgm.RData") #Load "shape" with coordinates 

proj4string(shape)<- CRS("+proj=tmerc +lat_0=0 +lon_0=19 +k=0.9993 +x_0=500000 +y_0=-5300000 +ellps=GRS80 +units=m +no_defs")

shape1 <- spTransform(shape, CRS("+proj=longlat +datum=WGS84"))
xy <- coordinates(shape1) #polygon center coords 
test<-as.data.frame(cbind(shape, xy))
names(test)[7:8]<-c("lon", "lat")

dat$Kod_Teryt %in% test$Kod_Teryt #check

dat1<-merge(test[which(test$Kod_Teryt %in% dat$Kod_Teryt),c(1,5, 7,8)], dat, by="Kod_Teryt", all.x=TRUE) 

## Conley errors for state socialist period: 

uspC_Conley<-conleyreg(ShareUspPerThs1982~divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
rzemC_Conley<-conleyreg(WRzemPrPerThs82~ divMig+shareMigrants+  PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince,data=dat1, 100, lat = "lat", lon = "lon")
punktyC_Conley<-conleyreg(LnPunktyPerThous1980 ~ divMig+ shareMigrants+ PctUrb1948 +SharePrzemysl1939 + ShareOver100 + LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
telC_conley<-conleyreg(LnTelsPerThs1980 ~ divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950 +DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
tvC_Conley<-conleyreg(LnTVsPerThs1980~  divMig+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")

texreg(
  l = list(uspC_Conley, rzemC_Conley, punktyC_Conley, telC_conley, tvC_Conley),
  override.se=list(uspC_Conley[,2], rzemC_Conley[,2],  punktyC_Conley[,2], telC_conley[,2],  tvC_Conley[,2]),
  override.pval=list(uspC_Conley[,4], rzemC_Conley[,4], punktyC_Conley[,4], telC_conley[,4], tvC_Conley[,4]),
  custom.coef.names = c('Migrant diversity','Share migrants', "Share urban", "Share in industry",
                        "Share farms over 100 ha", "Ln population", "Distance to state",
                        "Distance to border", "Distance to railway"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "GermanProvince|Intercept"
)


inc1993pc<-conleyreg(log(persIncTax1993pc)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
inc1995pc<-conleyreg(log(persIncTax1995pc)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
inc1998pc<-conleyreg(log(persIncTax1998pc)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
inc2000pc<-conleyreg(log(persIncTax2000pc)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")


#Private enterprises in 1995
ent1995pc<-conleyreg(log(PodmPryw1995perthous)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
ent1998pc<-conleyreg(log(PodmPryw1998perthous)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")
ent2000pc<-conleyreg(log(PodmPryw2000perthous)~  divMig+ shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat1, 100, lat = "lat", lon = "lon")

##Table 7.4 errors in brackets

texreg(
  l = list(inc1993pc,inc1995pc,inc1998pc,inc2000pc,ent1995pc,ent1998pc,ent2000pc),
  override.se=list(inc1993pc[,2], inc1995pc[,2],  inc1998pc[,2], inc2000pc[,2],  ent1995pc[,2], ent1998pc[,2], ent2000pc[,2]),
  override.pval=list(inc1993pc[,4], inc1995pc[,4], inc1998pc[,4], inc2000pc[,4], ent1995pc[,4], ent1998pc[,4], ent2000pc[,4]),
  custom.coef.names = c('Migrant diversity','Share migrants', "Share urban", "Share in industry",
                        "Share farms over 100 ha", "Ln population", "Distance to state",
                        "Distance to border", "Distance to railway"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "GermanProvince|Intercept"
)

######################################################
####### Table A.14: Models excluding Cities     ######
######################################################

inc1995nocities<-lm(LnpersIncTax1995pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])
inc1998nocities<-lm(LnpersIncTax1998pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])
inc2000nocities<-lm(LnpersIncTax2000pc~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])
ent1995nocities<-lm(LnPodmPryw1995perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])
ent1998nocities<-lm(LnPodmPryw1998perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])
ent2000nocities<-lm(LnPodmPryw2000perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat[dat$PctUrb1948<0.85,])

ses1 <- list(coeftest(inc1995nocities, vcov = vcovHC(inc1995nocities, type="HC1"))[,2], coeftest(inc1998nocities, vcov = vcovHC(inc1998nocities, type="HC1"))[,2],
             coeftest(inc2000nocities, vcov = vcovHC(inc2000nocities, type="HC1"))[,2])

pvals1 <- list(coeftest(ent1995nocities, vcov = vcovHC(ent1995nocities, type="HC1"))[,4], coeftest(ent1998nocities, vcov = vcovHC(ent1998nocities, type="HC1"))[,4], 
               coeftest(ent2000nocities, vcov = vcovHC(ent2000nocities, type="HC1"))[,4])


texreg(
  l = list(inc1995nocities, inc1998nocities, inc2000nocities, ent1995nocities, ent1998nocities, ent2000nocities),
  override.se=ses1,
  override.pval=pvals1,
  custom.coef.names = c('Migrant diversity','Share migrants',  "Share urban", "Share in industry",
                        "Share farms over 100 ha", "Ln population", "Distance to state",
                        "Distance to border", "Distance to railway"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "GermanProvince|Intercept"
)

##Conley errors for this table:
inc1995nocitiesConley<-conleyreg(LnpersIncTax1995pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat1$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")
inc1998nocitiesConley<-conleyreg(LnpersIncTax1998pc ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat1$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")
inc2000nocitiesConley<-conleyreg(LnpersIncTax2000pc~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat1$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")
ent1995nocitiesConley<-conleyreg(LnPodmPryw1995perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")
ent1998nocitiesConley<-conleyreg(LnPodmPryw1998perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat1$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")
ent2000nocitiesConley<-conleyreg(LnPodmPryw2000perthous ~  divMig+ shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+ GermanProvince, data=dat1[dat1$PctUrb1948<0.85,], 100, lat = "lat", lon = "lon")

texreg(
  l = list(inc1995nocitiesConley, inc1998nocitiesConley, inc2000nocitiesConley, ent1995nocitiesConley, ent1998nocitiesConley, ent2000nocitiesConley),
  override.se=list(inc1995nocitiesConley[,2], inc1998nocitiesConley[,2],  inc2000nocitiesConley[,2], ent1995nocitiesConley[,2],  ent1998nocitiesConley[,2], ent2000nocitiesConley[,2]),
  override.pval=list(inc1995nocitiesConley[,4], inc1998nocitiesConley[,4], inc2000nocitiesConley[,4], ent1995nocitiesConley[,4], ent1998nocitiesConley[,4], ent2000nocitiesConley[,4]),
  custom.coef.names = c('Migrant diversity','Share migrants',  "Share urban", "Share in industry",
                        "Share farms over 100 ha", "Ln population", "Distance to state",
                        "Distance to border", "Distance to railway"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "GermanProvince|Intercept"
)




###################################################################
########### Table A.15: Share of forced migrants as treatment #####
###################################################################
dat$ShareWyzsze78<-(dat$WyszeW78+ dat$niepWysze78+ dat$policealne78)/dat$Pop15Above78
dat$ShareWyzsze88<-dat$TertEdu1988/dat$Pop10to191988 
dat$ShareWyzsze88<-ifelse(dat$ShareWyzsze88>1,NA, dat$ShareWyzsze88)
dat$EduDiv88<-1-(dat$ShareWyzsze88^2+(dat$VocEdu1988/dat$Pop10to191988)^2+(dat$SecEdu1988/dat$Pop10to191988)^2+(dat$PrimEdu1988/dat$Pop10to191988)^2)


dat$PctUSSRm<-dat$fromUSSR48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48)
dat$PctCPm<-dat$fromCP48/(dat$fromUSSR48 +dat$fromCP48 +dat$fromOther48+ dat$aut48)


Inc1995Kresy<-lm(LnpersIncTax1995pc ~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
Inc1998Kresy<-lm(LnpersIncTax1998pc ~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
ent1995pcKresy<-lm(LnPodmPryw1995perthous ~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
ent1998pcKresy<-lm(LnPodmPryw1998perthous ~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
edu1988pcKresy<-lm(ShareWyzsze88~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)
edu2002pcKresy<-lm(ShareWyzsze02~  PctUSSRm+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 + DistPow1950+DistLandBorder + distrail48+ GermanProvince, data=dat)


ses1 <- list(coeftest(Inc1995Kresy, vcov = vcovHC(Inc1995Kresy, type="HC1"))[,2], 
             coeftest(Inc1998Kresy, vcov = vcovHC(Inc1998Kresy, type="HC1"))[,2], 
             coeftest(ent1995pcKresy, vcov = vcovHC(ent1995pcKresy, type="HC1"))[,2], 
             coeftest(ent1998pcKresy, vcov = vcovHC(ent1998pcKresy, type="HC1"))[,2],
             coeftest(edu1988pcKresy, vcov = vcovHC(edu1988pcKresy, type="HC1"))[,2],
             coeftest(edu2002pcKresy, vcov = vcovHC(edu1988pcKresy, type="HC1"))[,2]) 

pvals1 <- list(coeftest(Inc1995Kresy, vcov = vcovHC(Inc1995Kresy, type="HC1"))[,4],
               coeftest(Inc1998Kresy, vcov = vcovHC(Inc1998Kresy, type="HC1"))[,4], 
               coeftest(ent1995pcKresy, vcov = vcovHC(ent1995pcKresy, type="HC1"))[,4],
               coeftest(ent1998pcKresy, vcov = vcovHC(ent1998pcKresy, type="HC1"))[,4], 
               coeftest(edu1988pcKresy, vcov = vcovHC(edu1988pcKresy, type="HC1"))[,4],
               coeftest(edu2002pcKresy, vcov = vcovHC(edu2002pcKresy, type="HC1"))[,4]) 


texreg(
  l = list(Inc1995Kresy,Inc1998Kresy, ent1995pcKresy, ent1998pcKresy, edu1988pcKresy, edu2002pcKresy),
  override.se=ses1,
  override.pval=pvals1,
  custom.coef.names = c('Share forced migrants', "Share migrants","Share urban", "Share in industry",
                        "Share farms over 100 ha", "Ln population", "Distance to state",
                        "Distance to western border", "Distance to railway"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  omit.coef = "GermanProvince|Intercept"
)


####################################################################
#######  Table A16: Fiscal Capacity and downstream outcomes  #######
####################################################################

propR9395F<-lm(avgProptax9395 ~  divMig+ shareMigrants+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+DistPow1950 +log(TotPop48)+ SharePrzemysl1939+GermanProvince, data=dat) 
summary(propR9395F) 

propA9395F<-lm(log(PropTax9395PC) ~divMig+ shareMigrants+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanProvince, data=dat)
summary(propA9395F)

spendInvT2<-lm(log(CapSpPC)~log(PropTax9395PC)+PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ log(TotPop48)+ SharePrzemysl1939+ GermanProvince, data=dat)

income<-lm(LnpersIncTax2000pc~log(PropTax9395PC) +PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanProvince, data=dat)
summary(income)
enterprises<-lm(LnPodmPryw2000perthous~log(PropTax9395PC) +PctUrb1948+DistPow1950+ distrail48+ ShareOver100 + DistLandBorder+ distrail48+ DistPow1950+log(TotPop48)+ SharePrzemysl1939+ GermanProvince, data=dat)
summary(enterprises)

ses1 <- list(coeftest(propR9395F, vcov = vcovHC(propR9395F, type="HC1"))[,2], coeftest(propA9395F, vcov = vcovHC(propA9395F, type="HC1"))[,2], 
             coeftest(spendInvT2, vcov = vcovHC(spendInvT2, type="HC1"))[,2], 
             coeftest(income, vcov = vcovHC(income, type="HC1"))[,2], 
             coeftest(enterprises, vcov = vcovHC(enterprises, type="HC1"))[,2])

pvals1 <- list(coeftest(propR9395F, vcov = vcovHC(propR9395F, type="HC1"))[,4], coeftest(propA9395F, vcov = vcovHC(propA9395F, type="HC1"))[,4], 
               coeftest(spendInvT2, vcov = vcovHC(spendInvT2, type="HC1"))[,4], 
               coeftest(income, vcov = vcovHC(income, type="HC1"))[,4], 
               coeftest(enterprises, vcov = vcovHC(enterprises, type="HC1"))[,4])


stargazer(propR9395F, propA9395F,spendInvT2, income, enterprises, se=ses1, p=pvals1, 
          style = 'APSR', digits=2,
          omit=c("GermanProvince", "Constant", "PctUrb1948", "SharePrzemysl1939", "ShareOver100", "DistPow1950", "DistLandBorder", "distrail48", "TotPop48"), omit.stat=c("rsq","ser","f"), 
          covariate.labels = c('Migrant diversity','Migrant share',  "Property tax revenue"),
          add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes", "Yes", "Yes"), 
          c("Province Fixed Effects", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")), 
          column.labels=c("Property Tax Rate", "ln(Property Tax Revenue)", "ln(Income Tax)", "ln(Enterprises)"), 
          star.cutoffs=c(0.1, 0.05, 0.01), notes = c("* p<0.1; ** p<0.05; *** p<0.01"), notes.append = F)  


###############################################################################
############ Table A17: Diversity using four groups + group dummies ###########
###############################################################################

dat$DivTot<-1-(dat$PctUSSR^2+dat$PctCP^2+dat$PctOther^2+dat$PctAut^2)
dat$shareLargest<-rep(NA, nrow(dat))
for (i in 1:nrow(dat)){
  dat$shareLargest[i]<-max(dat$PctUSSR[i], dat$PctCP[i], dat$PctOther[i], dat$PctAut[i])
}

### Factor for which group is largest
dat$whichLargest<-rep(NA, nrow(dat))
dat$whichLargest[dat$shareLargest==dat$PctUSSR]<-"USSR"
dat$whichLargest[dat$shareLargest==dat$PctCP]<-"CP"
dat$whichLargest[dat$shareLargest==dat$PctOther]<-"Other"
dat$whichLargest[dat$shareLargest==dat$PctAut]<-"Aut"
dat$whichLargest<-relevel(as.factor(dat$whichLargest), ref="Aut")


inc95AltDiv<-lm(LnpersIncTax1995pc ~ divMig  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)
ent95AltDiv<-lm(LnPodmPryw1995perthous ~ divMig + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)
inc98AltDiv<-lm(LnpersIncTax1998pc ~ divMig  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)
ent98AltDiv<-lm(LnPodmPryw1998perthous ~ divMig + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)
inc00AltDiv<-lm(LnpersIncTax2000pc ~ divMig  + whichLargest +PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)
ent00AltDiv<-lm(LnPodmPryw2000perthous ~ divMig + whichLargest + PctUrb1948+ SharePrzemysl1939+ ShareOver100  +log(TotPop48) + DistPow1950 +DistLandBorder+ distrail48 +GermanProvince, data=dat)


ses1 <- list(coeftest(inc95AltDiv, vcov = vcovHC(inc95AltDiv, type="HC1"))[,2], coeftest(inc98AltDiv, vcov = vcovHC(inc98AltDiv, type="HC1"))[,2], 
             coeftest(inc00AltDiv, vcov = vcovHC(inc00AltDiv, type="HC1"))[,2], 
             coeftest(ent95AltDiv, vcov = vcovHC(ent95AltDiv, type="HC1"))[,2], 
             coeftest(ent98AltDiv, vcov = vcovHC(ent98AltDiv, type="HC1"))[,2], coeftest(ent00AltDiv, vcov = vcovHC(ent00AltDiv, type="HC1"))[,2])

pvals1 <- list(coeftest(inc95AltDiv, vcov = vcovHC(inc95AltDiv, type="HC1"))[,4], coeftest(inc98AltDiv, vcov = vcovHC(inc98AltDiv, type="HC1"))[,4],  coeftest(inc00AltDiv, vcov = vcovHC(inc00AltDiv, type="HC1"))[,4], 
               coeftest(ent95AltDiv, vcov = vcovHC(ent95AltDiv, type="HC1"))[,4], coeftest(ent98AltDiv, vcov = vcovHC(ent98AltDiv, type="HC1"))[,4], coeftest(ent00AltDiv, vcov = vcovHC(ent00AltDiv, type="HC1"))[,4])



stargazer(inc95AltDiv,inc98AltDiv, inc00AltDiv, ent95AltDiv, ent98AltDiv, ent00AltDiv, se=ses1, p=pvals1, style="APSR", 
          digits=2, 
          omit=c("GermanProvince", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "ShareOver100", "distrail48", "SharePrzemysl1939"), 
          omit.stat=c("rsq","ser","f"), 
          add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes"), c("Province FE", "Yes", "Yes", "Yes","Yes")), 
          dep.var.labels.include=TRUE, 
          covariate.labels = c("Migrant diversity", "Largest group: Central Poland","Largest group: Abroad",
                               "Largest group: Kresy"), notes.append = F)


################################################
############ Table A.18: Human capital #########
################################################

edu78b<-lm(ShareWyzsze78 ~  divMig +shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
edu88b<-lm(ShareWyzsze88 ~   divMig +shareMigrants + PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
edu02b<-lm(ShareWyzsze02 ~    divMig +shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
eduDiv1988<-lm(EduDiv88 ~    divMig +shareMigrants+ PctUrb1948+ SharePrzemysl1939 + ShareOver100  +LnTotPop48 +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)

ses1 <- list(coeftest(edu78b, vcov = vcovHC(edu78b, type="HC1"))[,2], coeftest(edu88b, vcov = vcovHC(edu88b, type="HC1"))[,2], 
             coeftest(edu02b, vcov = vcovHC(edu02b, type="HC1"))[,2], 
             coeftest(eduDiv1988, vcov = vcovHC(eduDiv1988, type="HC1"))[,2])

pvals1 <- list(coeftest(edu78b, vcov = vcovHC(edu78b, type="HC1"))[,4], coeftest(edu88b, vcov = vcovHC(edu88b, type="HC1"))[,4], 
               coeftest(edu02b, vcov = vcovHC(edu02b, type="HC1"))[,4], 
               coeftest(eduDiv1988, vcov = vcovHC(eduDiv1988, type="HC1"))[,4])


stargazer(edu78b,edu88b, edu02b, eduDiv1988, se=ses1, p=pvals1, style="APSR", 
          digits=2, 
          omit=c("GermanProvince", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "ShareOver100", "distrail48", "SharePrzemysl1939"), 
          omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes"), c("Province FE", "Yes", "Yes", "Yes","Yes")), dep.var.labels.include=TRUE, 
          covariate.labels = c('Migrant diversity', "Migrant share"), notes.append = F)



#################################################
############ Table A.19: Sorting  ###############
#################################################


frombirth<-lm(PtNotFromBirth ~ divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
arrived70s<-lm(PtArrived7178 ~divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
arrived80s<-lm(PtArrived8188 ~divMig + shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
inc95Sort<-lm(LnpersIncTax1995pc ~ divMig + PtArrived8188+PtArrived7178+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)
ent95Sort<-lm(LnPodmPryw1995perthous ~ divMig + PtArrived8188+PtArrived7178+shareMigrants+PctUrb1948+ SharePrzemysl1939 + ShareOver100  +log(TotPop48) +DistPow1950+ DistLandBorder + distrail48+GermanProvince, data=dat)


ses1 <- list(coeftest(frombirth, vcov = vcovHC(frombirth, type="HC1"))[,2], coeftest(arrived70s, vcov = vcovHC(arrived70s, type="HC1"))[,2], 
             coeftest(arrived80s, vcov = vcovHC(arrived80s, type="HC1"))[,2], 
             coeftest(inc95Sort, vcov = vcovHC(inc95Sort, type="HC1"))[,2], coeftest(ent95Sort, vcov = vcovHC(ent95Sort, type="HC1"))[,2])

pvals1 <- list(coeftest(frombirth, vcov = vcovHC(frombirth, type="HC1"))[,4], coeftest(arrived70s, vcov = vcovHC(arrived70s, type="HC1"))[,4], 
               coeftest(arrived80s, vcov = vcovHC(arrived80s, type="HC1"))[,4], 
               coeftest(inc95Sort, vcov = vcovHC(inc95Sort, type="HC1"))[,4], coeftest(ent95Sort, vcov = vcovHC(ent95Sort, type="HC1"))[,4])

stargazer(frombirth, arrived70s, arrived80s, inc95Sort, ent95Sort, se=ses1, p=pvals1, style="APSR", 
          digits=2, omit=c("GermanProvince", "Constant", "DistLandBorder", "TotPop48", "DistPow1950", "PctUrb1948", "ShareOver100", "distrail48"), 
          omit.stat=c("rsq","ser","f"), add.lines=list(c("Covariates", "Yes", "Yes", "Yes","Yes", "Yes","Yes"), c("Province FE", "Yes", "Yes", "Yes","Yes", "Yes","Yes")), dep.var.labels.include=TRUE, 
          covariate.labels = c('Migrant diversity', "Arrived 1980s", "Arrived 1970s", "Migrant share", "Share in industry"),
          column.labels=c("Not living from birth", "Arrived (1970s)", "Arrived (1980s)"), column.separate=c(3,1,1), notes.append = F)



